#Set Options
options(warn=-1)
options(scipen=999)

#Load Libraries
library(reshape)
library(plyr)
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:reshape':
## 
##     rename, round_any
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:reshape':
## 
##     rename
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forcats)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
## 
##     here
## The following object is masked from 'package:reshape':
## 
##     stamp
## The following object is masked from 'package:base':
## 
##     date
library(ggplot2)
library(xlsx)
library(sp)
library(rgdal)
## rgdal: version: 1.4-4, (SVN revision 833)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.6/Resources/library/rgdal/gdal
##  GDAL binary built with GEOS: FALSE 
##  Loaded PROJ.4 runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.6/Resources/library/rgdal/proj
##  Linking to sp version: 1.3-1
library(viridis)
## Loading required package: viridisLite
library(sjPlot)
library(sjmisc)
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
library(margins)
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
#Link to source for cross-tabs
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")

# Read in cleaned ODIN data from Pennsylvania [put website]
odin_pa <- read.csv("/Users/gbarboza/Desktop/ODIN_PA/Overdose_Information_Network_Data_CY_January_2018_-_Current_Monthly_County_State_Police.csv", na.strings=c("","NA"))

#Do some data cleaning
#Race and Ethnicity -------------------------------------
odin_pa$hisp<-ifelse(odin_pa$Ethnicity.Desc=='Hispanic',1,0)
odin_pa$race_eth[odin_pa$hisp == 0 & odin_pa$Race=="White"]<-"NHWhite"
odin_pa$race_eth[odin_pa$hisp == 0 & odin_pa$Race=="Black"]<-"NHBlack"
odin_pa$race_eth[odin_pa$hisp == 1 ]<-"Hisp"
odin_pa$black<-ifelse(odin_pa$Race=='Black',1,0)
odin_pa$white<-ifelse(odin_pa$Race=='White',1,0)

#Response time-------------------------------------------
odin_pa$Response.Time.Desc[odin_pa$Response.Time.Desc == "DID NOT WORK"]<-NA
odin_pa$Response.Time.Desc <- factor(odin_pa$Response.Time.Desc)

#Date & Time__________________________________
odin_pa$DT <-  mdy_hms(odin_pa$datetime)

#Insert hour, month and year as separate variables
odin_pa$hour <- hour(odin_pa$DT) 
odin_pa$month <- month(odin_pa$DT)  
odin_pa$year <- year(odin_pa$DT)  

#Ages into numeric var______________________________
odin_pa <- odin_pa %>%
  mutate(age = fct_recode(Age.Range,
                          "1" = "0 - 9"  ,
                          "1" = "10-14" ,
                          "1"= "15 - 19",
                          "2" = "20 - 24",
                          "2" = "25 - 29",
                          "3"=  "30 - 39" ,
                          "4" = "40 - 49" ,
                          "5" = "50 - 59"  ,
                          "6" = "60 - 69" ,
                          "6" = "70 - 79" ,
                          "6" = "80 - *" 
                          )
         )

#Revival Action ___________________________________         
odin_pa <- odin_pa %>%
  mutate(revived = fct_recode(Revive.Action.Desc,
                          "Arrest" = "ARREST"  ,
                          "Hospital" = "HOSPITAL CONSCIOUS" ,
                          "Hospital"="HOSPITAL UNCONSCIOUS",
                          "Refused" = "REFUSED TRANSPORT",
                          "Other" = "RELEASED",
                          "Referred to treatment"=  "TRANSPORTED TO TREATMENT" ,
                          "Referred to treatment" = "VERBALLY REFERRED TO TREATMENT" ,
                          "Other" = "OTHER"  ,
                          "NA" = "DON'T KNOW" 
                          )
  )

odin_pa <- odin_pa %>%
  mutate(drugs = fct_recode(Susp.OD.Drug.Desc,
                              "Alcohol" = "ALCOHOL"  ,
                              "Heroin" = "HEROIN" ,
                              "Fentanyl"="FENTANYL",
                              "Fentanyl" = "FENTANYL ANALOG/OTHER SYNTHETIC OPIOID",
                              "Pharma" = "PHARMACEUTICAL STIMULANT",
                              "Pharma"=  "PHARMACEUTICAL OTHER" ,
                              "Pharma" = "PHARMACEUTICAL OPIOID" ,
                              "Benzos" = "BENZODIAZEPINES (I.E.VALIUM, XANAX, ATIVAN, ETC)"  ,
                              "Methadone" = "METHADONE", 
                              "Suboxone" = "SUBOXONE",
                              "MJ" = "MARIJUANA",
                              "MJ" = "SYNTHETIC MARIJUANA",
                              "Meth" = "METHAMPHETAMINE",
                              "Carf" = "CARFENTANIL",
                              "Salts" = "BATH SALTS",
                              "unknown" = "unknown"
                        
                        )
  )

odin_pa$heroin<-ifelse(odin_pa$drugs=='Heroin',1,0)
odin_pa$fentanyl<-ifelse(odin_pa$drugs=='Fentanyl',1,0)
odin_pa$pharma<-ifelse(odin_pa$drugs=='Pharma',1,0)

#Before any analysis let's make sure we have the right selections
#In order to know how many unique incidents there were, we need to consider that each incident could have multiple victims. 
#The dataset does not take that into consideration, so we need to apply some filters
#The following dataset provides the count of all drugs involved with the incident using the
#key that identifies a unique record containing the details of a single incident (identified by date, time and location)
unique_incidents <- odin_pa[!duplicated(odin_pa[,c('Incident.ID')]),] # of the 14,400 total cases, 10,290 were unique incidents
duplicated_incidents <- odin_pa[duplicated(odin_pa[,c('Incident.ID')]),] # 4,110 were not unique, there are duplicate victims and/or multiple drugs suspected of causing the OD

#Below we get all incidents where each line represents a unique victim regardless of how many drugs were suspected 
#in causing the OD
#The following dataset contains incidents with unique victim IDs (i.e. each incident may have had more than 1 victim) using the
#key that identifies a unique record for a victim.
#According to the result, there were 10,465 unique incidents with unique victim IDs

unique_incidents_victims <- odin_pa[!duplicated(odin_pa[,c('Incident.ID','Victim.ID')]),]
duplicated_incidents_victims <- odin_pa[duplicated(odin_pa[,c('Incident.ID','Victim.ID')]),]

##Finally, this dataset selects incidents with multiple distinct victims (note, 
#some cases have multiple victimizations of the same person, this code excludes individuals with repeat victimizations 
#as it is not necessary to include them (i.e. their characteristics do not change))
multiple_unique_victims <-unique_incidents_victims %>% 
dplyr::group_by(Incident.ID) %>%
dplyr::add_count(Incident.ID) %>%
dplyr::distinct(Incident.ID, .keep_all = TRUE)

mean(multiple_unique_victims$n) #average number of persons per incident
## [1] 1.017007
table(multiple_unique_victims$n) #distribution of number of unique victims of drug OD per incident
## 
##     1     2     3     4     9 
## 10132   149     6     2     1
aggregate(n ~ race_eth, data=multiple_unique_victims, mean)
##   race_eth        n
## 1     Hisp 1.010187
## 2  NHBlack 1.037924
## 3  NHWhite 1.018672
aggregate(Dose.Count ~ race_eth, data=multiple_unique_victims, mean)
##   race_eth Dose.Count
## 1     Hisp   0.959253
## 2  NHBlack   1.051896
## 3  NHWhite   1.015674
aggregate(Dose.Unit ~ race_eth, data=multiple_unique_victims, mean)
##   race_eth Dose.Unit
## 1     Hisp  1.738540
## 2  NHBlack  2.011976
## 3  NHWhite  1.741584
#incidents and victims with each line representing a different drug suspected of causing the OD
#this dataset is the number of incidents with victims that had multiple drugs suspected of causing the OD using the
#unique identifier for each suspected drug causing the overdose. 
unique_incidents_victims_drugs <- odin_pa[!duplicated(odin_pa[,c('Incident.ID','Victim.ID', 'Victim.OD.Drug.ID')]),]
duplicated_incidents_victims_drugs <- odin_pa[duplicated(odin_pa[,c('Incident.ID','Victim.ID', 'Victim.OD.Drug.ID')]),]

multiple_unique_victims_polydrugs <-unique_incidents_victims_drugs %>% #this dataset selects incidents with multiple distinct victims and counts
  #how many drugs were suspected in causing their overdose
  dplyr::group_by(Incident.ID) %>%
  dplyr::add_count(Victim.ID) %>%
  dplyr::distinct(Victim.ID, .keep_all = TRUE)

multiple_unique_victims_polydrugs$polydrug<-ifelse(multiple_unique_victims_polydrugs$n >1,1,0)
newdata <- multiple_unique_victims_polydrugs[ which(multiple_unique_victims_polydrugs$race_eth=='NHWhite'), ]
crosstab(newdata, row.vars = "Naloxone_Admin", col.vars = "polydrug", type = "c")
##                polydrug      0      1
## Naloxone_Admin                       
## 0                        32.37  39.64
## 1                        67.63  60.36
## Sum                     100.00 100.00
chisq.test(newdata$Naloxone_Admin, newdata$polydrug, correct = FALSE)
## 
##  Pearson's Chi-squared test
## 
## data:  newdata$Naloxone_Admin and newdata$polydrug
## X-squared = 36.23, df = 1, p-value = 0.000000001754
multiple_unique_victims$missing<-ifelse(is.na(multiple_unique_victims$Survived),1,0)
#same as unique incidents
mean(multiple_unique_victims_polydrugs$n)
## [1] 1.330721
table(multiple_unique_victims_polydrugs$n)
## 
##    1    2    3    4    5    6    7 
## 7766 2148  401  107   27   14    2
aggregate(n ~ race_eth, data=multiple_unique_victims_polydrugs, mean)
##   race_eth        n
## 1     Hisp 1.332209
## 2  NHBlack 1.360308
## 3  NHWhite 1.379449
#same result using the following code
polydruguse <- aggregate(Victim.OD.Drug.ID ~ Incident.ID+Victim.ID, data = unique_incidents_victims_drugs, FUN = length)
mean(polydruguse$Victim.OD.Drug.ID)
## [1] 1.330721
table(polydruguse$Victim.OD.Drug.ID)
## 
##    1    2    3    4    5    6    7 
## 7766 2148  401  107   27   14    2
##########Descriptive statistics
table(multiple_unique_victims_polydrugs$race_eth)
## 
##    Hisp NHBlack NHWhite 
##     593     519    7474
#Chi-Square of indep vars by race
crosstab(multiple_unique_victims_polydrugs, row.vars = "age", col.vars = "race_eth", type = "c")
##     race_eth   Hisp NHBlack NHWhite
## age                                
## 1              3.71    5.01    2.09
## 2             30.86   32.37   35.88
## 3             31.70   28.13   37.33
## 4             22.43   13.10   14.84
## 5              9.11   14.26    7.60
## 6              2.19    7.13    2.26
## Sum          100.00  100.00  100.00
chisq.test(multiple_unique_victims_polydrugs$age, multiple_unique_victims_polydrugs$race_eth, correct = FALSE)
## 
##  Pearson's Chi-squared test
## 
## data:  multiple_unique_victims_polydrugs$age and multiple_unique_victims_polydrugs$race_eth
## X-squared = 137.79, df = 10, p-value < 0.00000000000000022
#Descriptives on dates
odin_pa %>%
  ggplot(aes(wday(DT, label = TRUE))) +
  geom_bar(fill = "midnightblue", alpha = 0.8) +
  labs(x = NULL,
       y = "Number of events")

race_hour_OD <- unique_incidents_victims %>% dplyr::group_by(race = race_eth, hour = hour(DT)) %>% 
  dplyr::count()  %>% tidyr::spread(race, n) 

race_month_OD <- unique_incidents_victims %>% dplyr::group_by(race = race_eth, month = month(DT)) %>% 
  dplyr::count()  %>% tidyr::spread(race, n) 

#save to excel for better plotting, I did not like any options here
#write.xlsx(data.frame(race_hour_OD), "race_hour_OD_excel.xlsx")
#write.xlsx(data.frame(race_month_OD), "race_month_OD_excel.xlsx")

DRUG_OD <- unique_incidents_victims_drugs %>% dplyr::group_by(ID = Incident.ID, drug = Susp.OD.Drug.Desc) %>% 
  dplyr::count()  %>% tidyr::spread(drug, n) 

d<- unique_incidents_victims %>% #max od in a single day
  select(DT, race_eth) %>%
  dplyr::group_by(race = race_eth, year = year(DT), month = month(DT), day = day(DT)) %>% 
  dplyr::count()  %>% tidyr::spread(race, n) 

d<- as.data.frame(d)
write.xlsx(d, "d.xlsx")

d<- unique_incidents_victims %>%
  select(DT, race_eth) %>%
  dplyr::group_by(race = race_eth, hour = hour(DT)) %>% 
  dplyr::count()  %>% 
  filter(hour>0) %>% tidyr::spread(race, n)

colors <- c("Hisp" = "blue", "NHBlack" = "red", "NHWhite" = "green")

ggplot(d, aes(x=hour)) +
  geom_line(aes(hour, Hisp*50/3, color = "Hisp"), size=.8, linetype="dashed", size=.5) +
  geom_point(aes(hour, Hisp*50/3, color = "Hisp"),size=1) +
  geom_line(aes(hour, NHBlack*50/3, color = "NHBlack"),  linetype="dashed", size=.5) +
  geom_point(aes(hour, NHBlack*50/3,  color = "NHBlack"),size=1) +
  geom_line(aes(hour, NHWhite, color = "NHWhite"), linetype="dashed", size=.5 ) +
  geom_point(aes(hour, NHWhite, color = "NHWhite"),size=1) +
  scale_y_continuous(
    name = "White", 
    sec.axis = sec_axis(~ . *3/50  , name = "Non-White")) + 
  theme(panel.background = element_rect(fill= "white", color = "black", 
     linetype = "solid"), legend.position="top")  + 
  labs(y = "Number of Overdose Responses",
       x = "Hour of Day")  + scale_colour_manual(values = colors)  + scale_color_discrete(name = "Race/Ethnicity", 
      labels=c("Latino", "Non-Hispanic Black", "Non-Hispanic White")) +
  ggtitle("Hourly Distribution of Drug Overdose Responses by Race/Ethnicity") 
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

d<- unique_incidents_victims %>%
  select(DT, race_eth) %>%
  dplyr::group_by(race = race_eth, month = month(DT)) %>% 
  dplyr::count()  %>% 
  tidyr::spread(race, n)

ggplot(d, aes(x=month)) +
  geom_line(aes(month, Hisp*50/3, color = "Hisp"), size=.8, linetype="dashed", size=.5) +
  geom_point(aes(month, Hisp*50/3, color = "Hisp"),size=1) +
  geom_line(aes(month, NHBlack*50/3, color = "NHBlack"),  linetype="dashed", size=.5) +
  geom_point(aes(month, NHBlack*50/3,  color = "NHBlack"),size=1) +
  geom_line(aes(month, NHWhite, color = "NHWhite"), linetype="dashed", size=.5 ) +
  geom_point(aes(month, NHWhite, color = "NHWhite"),size=1) +
  scale_y_continuous(
    name = "White", 
    sec.axis = sec_axis(~ . *3/50  , name = "Non-White")) + 
  theme(panel.background = element_rect(fill= "white", color = "black", 
                                        linetype = "solid"), legend.position="top")  + 
  labs(y = "Number of Overdose Responses",
       x = "Month of Year")  + scale_colour_manual(values = colors)  + scale_color_discrete(name = "Race/Ethnicity", 
  labels=c("Latino", "Non-Hispanic Black", "Non-Hispanic White")) + scale_x_continuous(breaks=c(1:12)) +
  ggtitle("Monthly Distribution of Drug Overdose Responses by Race/Ethnicity") 
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

round(prop.table(table(unique_incidents_victims$race_eth)),3)
## 
##    Hisp NHBlack NHWhite 
##   0.069   0.060   0.870
round(prop.table(table(unique_incidents_victims$age)),3)
## 
##     1     2     3     4     5     6 
## 0.023 0.346 0.364 0.153 0.086 0.028
unique_incidents_victims %>% 
  mutate(wday = wday(DT, label = TRUE)) %>% 
  ggplot(aes(x = wday)) +
  geom_bar()

unique_incidents_victims %>% #This is interesting but useless
  mutate(minute = minute(DT)) %>% 
  group_by(minute) %>% 
  summarise(
    avg_dose = mean(Dose.Unit, na.rm = TRUE),
    n = n()) %>% 
  ggplot(aes(minute, avg_dose)) +
  geom_line()

unique_incidents_victims %>% 
  filter(race_eth=="NHWhite") %>% 
  mutate(month = month(DT, label = TRUE)) %>% 
  ggplot(aes(x = month)) +
  geom_bar()

unique_incidents_victims %>% 
  filter(race_eth=="NHBlack") %>% 
  mutate(month = month(DT, label = TRUE)) %>% 
  ggplot(aes(x = month)) +
  geom_bar()

unique_incidents_victims %>% 
  filter(race_eth=="Hisp") %>% 
  mutate(month = month(DT, label = TRUE)) %>% 
  ggplot(aes(x = month)) +
  geom_bar()

unique_incidents_victims %>% 
  filter(race_eth=="NHWhite") %>% 
  count(week = floor_date(DT, "week")) %>% 
  ggplot(aes(week, n)) +
  geom_line() 

unique_incidents_victims %>% 
  filter(race_eth=="NHBlack") %>% 
  count(week = floor_date(DT, "week")) %>% 
  ggplot(aes(week, n)) +
  geom_line() 

unique_incidents_victims %>% 
  filter(race_eth=="Hisp") %>% 
  count(week = floor_date(DT, "week")) %>% 
  ggplot(aes(week, n)) +
  geom_line() 

unique_incidents_victims %>% 
  filter(race_eth=="NHWhite") %>% 
  mutate(hour = hour(DT)) %>% 
  ggplot(aes(x = hour)) +
  geom_bar()

unique_incidents_victims %>% 
  filter(race_eth=="NHBlack") %>% 
  mutate(hour = hour(DT)) %>% 
  ggplot(aes(x = hour)) +
  geom_bar()

unique_incidents_victims %>% 
  filter(race_eth=="Hisp") %>% 
  mutate(hour = hour(DT)) %>% 
  ggplot(aes(x = hour)) +
  geom_bar()

unique_incidents_victims$age_num <- as.numeric(as.character(unique_incidents_victims$age))

mod1 <- glm(Survived ~ Naloxone_Admin + age_num + hour +  month + year +
               heroin + pharma + fentanyl + Gender.Desc + white + black + black*Naloxone_Admin + white*Naloxone_Admin +
               Incident.County.Latitude+Victim.County.Longitude , family='binomial', data=unique_incidents_victims)
summary(mod1)
## 
## Call:
## glm(formula = Survived ~ Naloxone_Admin + age_num + hour + month + 
##     year + heroin + pharma + fentanyl + Gender.Desc + white + 
##     black + black * Naloxone_Admin + white * Naloxone_Admin + 
##     Incident.County.Latitude + Victim.County.Longitude, family = "binomial", 
##     data = unique_incidents_victims)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7734   0.2776   0.3577   0.5422   1.7904  
## 
## Coefficients:
##                             Estimate  Std. Error z value
## (Intercept)              -207.477251  131.588021  -1.577
## Naloxone_Admin              1.055234    1.126053   0.937
## age_num                    -0.252432    0.028354  -8.903
## hour                        0.028220    0.004284   6.587
## month                       0.009369    0.010400   0.901
## year                        0.102870    0.065157   1.579
## heroin                      0.134111    0.083995   1.597
## pharma                      0.727335    0.181632   4.004
## fentanyl                   -0.842137    0.095979  -8.774
## Gender.DescMale            -0.209888    0.070278  -2.987
## white                      -0.210459    1.008445  -0.209
## black                       0.327980    1.022947   0.321
## Incident.County.Latitude   -0.010577    0.064181  -0.165
## Victim.County.Longitude    -0.018659    0.017285  -1.080
## Naloxone_Admin:black        0.956021    1.158564   0.825
## Naloxone_Admin:white        1.121666    1.128115   0.994
##                                      Pr(>|z|)    
## (Intercept)                           0.11486    
## Naloxone_Admin                        0.34870    
## age_num                  < 0.0000000000000002 ***
## hour                          0.0000000000448 ***
## month                                 0.36766    
## year                                  0.11438    
## heroin                                0.11034    
## pharma                        0.0000621629789 ***
## fentanyl                 < 0.0000000000000002 ***
## Gender.DescMale                       0.00282 ** 
## white                                 0.83469    
## black                                 0.74850    
## Incident.County.Latitude              0.86910    
## Victim.County.Longitude               0.28035    
## Naloxone_Admin:black                  0.40927    
## Naloxone_Admin:white                  0.32009    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7960.5  on 8286  degrees of freedom
## Residual deviance: 6367.5  on 8271  degrees of freedom
##   (2178 observations deleted due to missingness)
## AIC: 6399.5
## 
## Number of Fisher Scoring iterations: 5
confint(mod1)
## Waiting for profiling to be done...
##                                  2.5 %      97.5 %
## (Intercept)              -465.80591503 50.09333570
## Naloxone_Admin             -1.25817601  3.39679880
## age_num                    -0.30805348 -0.19688534
## hour                        0.01982626  0.03662254
## month                      -0.01101711  0.02975806
## year                       -0.02466898  0.23078418
## heroin                     -0.03131306  0.29801687
## pharma                      0.37887933  1.09206857
## fentanyl                   -1.03098968 -0.65467684
## Gender.DescMale            -0.34828156 -0.07274236
## white                      -2.34468500  1.92284355
## black                      -1.82911905  2.48437297
## Incident.County.Latitude   -0.13583779  0.11580304
## Victim.County.Longitude    -0.05260415  0.01516131
## Naloxone_Admin:black       -1.43941842  3.32526055
## Naloxone_Admin:white       -1.22326216  3.43861042
round(exp(coef(mod1)-1)*100,4)
##              (Intercept)           Naloxone_Admin                  age_num 
##                   0.0000                 105.6788                  28.5809 
##                     hour                    month                     year 
##                  37.8409                  37.1342                  40.7738 
##                   heroin                   pharma                 fentanyl 
##                  42.0677                  76.1348                  15.8478 
##          Gender.DescMale                    white                    black 
##                  29.8231                  29.8060                  51.0676 
## Incident.County.Latitude  Victim.County.Longitude     Naloxone_Admin:black 
##                  36.4009                  36.1079                  95.6974 
##     Naloxone_Admin:white 
##                 112.9376
pR2(mod1)
##           llh       llhNull            G2      McFadden          r2ML 
## -3183.7565419 -4543.3366670  2719.1602503     0.2992471     0.2797260 
##          r2CU 
##     0.4200337
length(residuals(mod1)) #get number of observations
## [1] 8287
mod1_margins <- margins(mod1)

cplot(mod1, "age_num", what="predict", draw=TRUE)
##       xvals     yvals     upper     lower
## 1  1.000000 0.9114779 0.9234533 0.8995026
## 2  1.208333 0.9071418 0.9188539 0.8954297
## 3  1.416667 0.9026160 0.9140445 0.8911875
## 4  1.625000 0.8978944 0.9090268 0.8867621
## 5  1.833333 0.8929711 0.9038062 0.8821360
## 6  2.041667 0.8878400 0.8983921 0.8772879
## 7  2.250000 0.8824953 0.8927990 0.8721915
## 8  2.458333 0.8769312 0.8870470 0.8668154
## 9  2.666667 0.8711421 0.8811611 0.8611231
## 10 2.875000 0.8651226 0.8751703 0.8550749
## 11 3.083333 0.8588674 0.8691038 0.8486310
## 12 3.291667 0.8523717 0.8629869 0.8417564
## 13 3.500000 0.8456307 0.8568368 0.8344245
## 14 3.708333 0.8386401 0.8506606 0.8266197
## 15 3.916667 0.8313962 0.8444556 0.8183368
## 16 4.125000 0.8238953 0.8382122 0.8095783
## 17 4.333333 0.8161345 0.8319171 0.8003518
## 18 4.541667 0.8081113 0.8255559 0.7906668
## 19 4.750000 0.7998239 0.8191145 0.7805334
## 20 4.958333 0.7912711 0.8125804 0.7699618

allEffects(mod1)
##  model: Survived ~ Naloxone_Admin + age_num + hour + month + year + heroin + 
##     pharma + fentanyl + Gender.Desc + white + black + black * 
##     Naloxone_Admin + white * Naloxone_Admin + Incident.County.Latitude + 
##     Victim.County.Longitude
## 
##  age_num effect
## age_num
##         1         2         4         5         6 
## 0.9165814 0.8951394 0.8374646 0.8001211 0.7566903 
## 
##  hour effect
## hour
##         0       5.8        12        17        23 
## 0.8232485 0.8458206 0.8672840 0.8827005 0.8991283 
## 
##  month effect
## month
##         1         4         6         9        10 
## 0.8627379 0.8660326 0.8681918 0.8713752 0.8724216 
## 
##  year effect
## year
##      2018    2018.2    2018.5    2018.8      2019 
## 0.8635436 0.8659499 0.8694920 0.8729542 0.8752185 
## 
##  heroin effect
## heroin
##         0       0.2       0.5       0.8         1 
## 0.8597173 0.8629211 0.8676111 0.8721644 0.8751252 
## 
##  pharma effect
## pharma
##         0       0.2       0.5       0.8         1 
## 0.8653713 0.8814365 0.9024110 0.9200117 0.9300835 
## 
##  fentanyl effect
## fentanyl
##         0       0.2       0.5       0.8         1 
## 0.8859711 0.8678183 0.8360551 0.7984319 0.7699617 
## 
##  Gender.Desc effect
## Gender.Desc
##    Female      Male 
## 0.8845270 0.8612999 
## 
##  Incident.County.Latitude effect
## Incident.County.Latitude
##      39.9      40.4      40.9      41.5        42 
## 0.8695842 0.8689832 0.8683800 0.8676529 0.8670445 
## 
##  Victim.County.Longitude effect
## Victim.County.Longitude
##       -80       -79       -78       -76       -75 
## 0.8745134 0.8724514 0.8703605 0.8660912 0.8639123 
## 
##  Naloxone_Admin*black effect
##               black
## Naloxone_Admin         0       0.2       0.5       0.8         1
##            0   0.6086178 0.6241269 0.6469127 0.6690496 0.6834096
##            0.2 0.7026141 0.7238448 0.7538717 0.7816187 0.7988241
##            0.5 0.8156553 0.8386723 0.8687760 0.8939725 0.9083102
##            0.8 0.8923133 0.9115856 0.9346851 0.9520669 0.9611113
##            1   0.9264137 0.9421147 0.9598773 0.9723493 0.9784767
## 
##  Naloxone_Admin*white effect
##               white
## Naloxone_Admin         0       0.2       0.5       0.8         1
##            0   0.6592020 0.6496835 0.6351815 0.6204299 0.6104684
##            0.2 0.7077904 0.7083640 0.7092231 0.7100807 0.7106516
##            0.5 0.7724297 0.7845120 0.8017501 0.8179293 0.8281334
##            0.8 0.8262796 0.8451243 0.8702220 0.8917734 0.9043361
##            1   0.8562422 0.8772524 0.9037867 0.9250751 0.9367683
plot(effect("white", mod1))
## NOTE: white is not a high-order term in the model

plot(effect("age_num", mod1))

mod_hisp <- glm(Survived ~ Naloxone_Admin * Gender.Desc * hisp, family='binomial', data=unique_incidents_victims)
no_naloxone_male = data.frame("hisp"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Male")
naloxone_male = data.frame("hisp"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Male")

no_naloxone_female = data.frame("hisp"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Female")
naloxone_female = data.frame("hisp"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Female")

predict(mod_hisp, no_naloxone_male, type = "response")
##        1 
## 0.734375
predict(mod_hisp, naloxone_male, type = "response")
##         1 
## 0.9463722
predict(mod_hisp, no_naloxone_female, type = "response")
##         1 
## 0.7540984
predict(mod_hisp, naloxone_female, type = "response")
##         1 
## 0.8852459
mod_white <- glm(Survived ~  Naloxone_Admin * Gender.Desc * white , family='binomial', data=unique_incidents_victims)
no_naloxone_male = data.frame("white"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Male")
naloxone_male = data.frame("white"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Male")

no_naloxone_female = data.frame("white"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Female")
naloxone_female = data.frame("white"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Female")

predict(mod_white, no_naloxone_male, type = "response")
##         1 
## 0.5851962
predict(mod_white, naloxone_male, type = "response")
##         1 
## 0.9269917
predict(mod_white, no_naloxone_female, type = "response")
##         1 
## 0.6568421
predict(mod_white, naloxone_female, type = "response")
##         1 
## 0.9330986
#Note, for plots the order in which variables enter matters
mod_black <- glm(Survived ~ Naloxone_Admin *  Gender.Desc * black , family='binomial', data=unique_incidents_victims)
no_naloxone_male = data.frame("black"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Male")
naloxone_male = data.frame("black"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Male")

no_naloxone_female = data.frame("black"=1, "Naloxone_Admin"=0, "Gender.Desc" = "Female")
naloxone_female = data.frame("black"=1, "Naloxone_Admin"=1, "Gender.Desc" = "Female")

predict(mod_black, no_naloxone_male, type = "response")
##         1 
## 0.7037037
predict(mod_black, naloxone_male, type = "response")
##         1 
## 0.9513514
predict(mod_black, no_naloxone_female, type = "response")
##         1 
## 0.7727273
predict(mod_black, naloxone_female, type = "response")
##         1 
## 0.9237288
theme_set(theme_sjplot())
plot_model(mod_white, type = "pred", terms = c("Naloxone_Admin", "white"))

plot_model(mod_black, type = "pred", terms = c("Naloxone_Admin", "black"))

plot_model(mod_hisp, type = "pred", terms = c("Naloxone_Admin", "hisp"))

plot_model(mod_hisp, type = "int")

plot_model(mod_white, type = "int")

plot_model(mod_black, type = "int")

#######################
x<-aggregate(Incident.ID ~ Incident.County.Name, FUN = length, data = unique_incidents_victims)
x_black<-aggregate(Incident.ID ~ Incident.County.Name, FUN = length, data = subset(unique_incidents_victims, black==1))
x_white<-aggregate(Incident.ID ~ Incident.County.Name, FUN = length, data = subset(unique_incidents_victims, white==1))
x_hisp<-aggregate(Incident.ID ~ Incident.County.Name, FUN = length, data = subset(unique_incidents_victims, hisp==1))


pop_dat <- read.csv("/Users/gbarboza/Desktop/ODIN_PA/pop.csv")
x <- merge(x_black, x, by = "Incident.County.Name", all.y = TRUE)
x <- merge(x_white, x, by = "Incident.County.Name", all.y = TRUE)
x <- merge(x_hisp, x, by = "Incident.County.Name", all.y = TRUE)

names(x)[1] <- "NAME"
names(x)[2] <- "nOD_hisp"
names(x)[3] <- "nOD_white"
names(x)[4] <- "nOD_black"
names(x)[5] <- "nOD_all"

x[is.na(x)] = 0


overdose_rates <- merge(x, pop_dat, by = "NAME")

overdose_rates$nOD_all_rate <- ((overdose_rates$nOD_all/overdose_rates$ACS_17_5YR)*100000)/2
overdose_rates$nOD_hisp_rate <- ((overdose_rates$nOD_hisp/overdose_rates$pophisp)*100000)/2
overdose_rates$nOD_white_rate <- ((overdose_rates$nOD_white/overdose_rates$popwhite)*100000)/2
overdose_rates$nOD_black_rate <- ((overdose_rates$nOD_black/overdose_rates$popblack)*100000)/2

penn <- readOGR("/Users/gbarboza/Desktop/OD in PA/", "county demographics")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/gbarboza/Desktop/OD in PA", layer: "county demographics"
## with 67 features
## It has 27 fields
## Integer64 fields read as strings:  newAggct popdens urban CL
plot(penn)

overdose_rates <- overdose_rates %>% mutate(urban = factor(urban, levels = c(0, 1), labels = c("Rural", "Urban")))

gd <- overdose_rates %>% 
  group_by(urban) %>% 
  summarise(nOD_all_rate = mean(nOD_all_rate))

ggplot(overdose_rates, aes(x = urban, y = nOD_all_rate, color = urban, fill = urban)) +
  geom_bar(data = gd, stat = "identity", alpha = .3) +
  ggrepel::geom_text_repel(aes(label = overdose_rates$NAME), color = "black", size = 2.5, segment.color = "grey") +
  geom_point() +
  guides(color = "none", fill = "none") +
  theme_bw() +
  labs(
    title = "Heroin Overdose Response Rate by Urbanicity",
    subtitle = "Source: Pennsylvania Overdose Information Network (2018 - 2019)",
    x = "Region",
    y = "OD RATE"
  )

# We create the data foundation from the shapefile
penn@data$id = rownames(penn@data)
penn.points = fortify(penn, region="id")
## SpP is invalid
penn.df = join(penn.points, penn@data, by="id")
# We merge with churn regional data
merged <- merge(penn.df, overdose_rates, by = "NAME")
merged <- merged[order(merged$order), ]

no_classes <- 5
labels <- c()

#all

quantiles <- quantile(merged$nOD_all_rate, probs = seq(0, 1, length.out = no_classes + 1))

for(idx in 1:length(quantiles)){
  labels <- c(labels, paste0(round(quantiles[idx], 2), 
                             " – ", 
                             round(quantiles[idx + 1], 2)))
}
labels <- labels[1:length(labels)-1]

merged$OD_all_quantiles <- cut(merged$nOD_all_rate, 
breaks = quantiles, 
labels = labels, 
include.lowest = T)


p <- ggplot() +
  geom_polygon(data = merged, aes(fill = OD_all_quantiles, 
                                    x = long, 
                                    y = lat, 
                                    group = group)) +
  coord_equal() +
  labs(x = NULL, 
       y = NULL, 
       title = "Spatial Distribution of Overdose Response Rates", 
       subtitle = "Pennsylvania Counties", 
       caption = "Source: Pennsylvania Overdose Information Network (2018-2019)") +
  # now the discrete-option is used, 
  # and we use guide_legend instead of guide_colourbar
  scale_fill_viridis(
    option = "cividis",
    name = "Overdose Response Rate",
    discrete = T,
    direction = -1,
    guide = guide_legend(
      keyheight = unit(5, units = "mm"),
      title.position = 'top',
      reverse = T
    ))
p

#white
# We create the data foundation from the shapefile
penn@data$id = rownames(penn@data)
penn.points = fortify(penn, region="id")
## SpP is invalid
penn.df = join(penn.points, penn@data, by="id")
# We merge with churn regional data
merged <- merge(penn.df, overdose_rates, by = "NAME")
merged <- merged[order(merged$order), ]

no_classes <- 8
labels <- c()

quantiles <- quantile(merged$nOD_white_rate, probs = seq(0, 1, length.out = no_classes + 1))

for(idx in 1:length(quantiles)){
  labels <- c(labels, paste0(round(quantiles[idx], 2), 
                             " – ", 
                             round(quantiles[idx + 1], 2)))
}
labels <- labels[1:length(labels)-1]

merged$OD_white_quantiles <- cut(merged$nOD_white_rate, 
                               breaks = quantiles, 
                               labels = labels, 
                               include.lowest = T)


p <- ggplot() +
  geom_polygon(data = merged, aes(fill = OD_white_quantiles, 
                                  x = long, 
                                  y = lat, 
                                  group = group)) +
  coord_equal() +
  labs(x = NULL, 
       y = NULL, 
       title = "Spatial Distribution of Overdose Response Rates [Whites]", 
       subtitle = "Pennsylvania Counties", 
       caption = "Source: Pennsylvania Overdose Information Network (2018-2019)") +
  # now the discrete-option is used, 
  # and we use guide_legend instead of guide_colourbar
  scale_fill_viridis(
    option = "cividis",
    name = "Overdose Response Rate [Whites]",
    discrete = T,
    direction = -1,
    guide = guide_legend(
      keyheight = unit(5, units = "mm"),
      title.position = 'top',
      reverse = T
    ))
p

#black

# We create the data foundation from the shapefile
penn@data$id = rownames(penn@data)
penn.points = fortify(penn, region="id")
## SpP is invalid
penn.df = join(penn.points, penn@data, by="id")
# We merge with churn regional data
merged <- merge(penn.df, overdose_rates, by = "NAME")
merged <- merged[order(merged$order), ]

no_classes <- 10
labels <- c()

quantiles <- unique(quantile(merged$nOD_black_rate, probs = seq(0, 1, length.out = no_classes + 1)))

for(idx in 1:length(quantiles)){
  labels <- c(labels, paste0(round(quantiles[idx], 2), 
                             " – ", 
                             round(quantiles[idx + 1], 2)))
}
labels <- labels[1:length(labels)-1]

merged$OD_black_quantiles <- cut(merged$nOD_black_rate, 
                                 breaks = quantiles, 
                                 labels = labels, 
                                 include.lowest = T)


p <- ggplot() +
  geom_polygon(data = merged, aes(fill = OD_black_quantiles, 
                                  x = long, 
                                  y = lat, 
                                  group = group)) +
  coord_equal() +
  labs(x = NULL, 
       y = NULL, 
       title = "Spatial Distribution of Overdose Response Rates [Black]", 
       subtitle = "Pennsylvania Counties", 
       caption = "Source: Pennsylvania Overdose Information Network (2018-2019)") +
  # now the discrete-option is used, 
  # and we use guide_legend instead of guide_colourbar
  scale_fill_viridis(
    option = "cividis",
    name = "Overdose Response Rate [Black]",
    discrete = T,
    direction = -1,
    guide = guide_legend(
      keyheight = unit(5, units = "mm"),
      title.position = 'top',
      reverse = T
    ))
p

#hisp

# We create the data foundation from the shapefile
penn@data$id = rownames(penn@data)
penn.points = fortify(penn, region="id")
## SpP is invalid
penn.df = join(penn.points, penn@data, by="id")
# We merge with churn regional data
merged <- merge(penn.df, overdose_rates, by = "NAME")
merged <- merged[order(merged$order), ]

no_classes <- 14
labels <- c()

quantiles <- unique(quantile(merged$nOD_hisp_rate, probs = seq(0, 1, length.out = no_classes + 1)))

for(idx in 1:length(quantiles)){
  labels <- c(labels, paste0(round(quantiles[idx], 2), 
                             " – ", 
                             round(quantiles[idx + 1], 2)))
}
labels <- labels[1:length(labels)-1]

merged$OD_hisp_quantiles <- cut(merged$nOD_hisp_rate, 
                                 breaks = quantiles, 
                                 labels = labels, 
                                 include.lowest = T)


p <- ggplot() +
  geom_polygon(data = merged, aes(fill = OD_hisp_quantiles, 
                                  x = long, 
                                  y = lat, 
                                  group = group)) +
  coord_equal() +
  labs(x = NULL, 
       y = NULL, 
       title = "Spatial Distribution of Overdose Response Rates [Latinos]", 
       subtitle = "Pennsylvania Counties", 
       caption = "Source: Pennsylvania Overdose Information Network (2018-2019)") +
  # now the discrete-option is used, 
  # and we use guide_legend instead of guide_colourbar
  scale_fill_viridis(
    option = "cividis",
    name = "Overdose Response Rate [Latinos]",
    discrete = T,
    direction = -1,
    guide = guide_legend(
      keyheight = unit(5, units = "mm"),
      title.position = 'top',
      reverse = T
    ))
p